Skip to content

Conversation

jorisvandenbossche
Copy link
Member

#36839 added an extensive benchmark class for select_dtypes (dtypes.py::SelectDtypes), but we actually already had a benchmark in frame_methods.py. Now, the new ones are more extensive, testing multiple dtypes and both include/exclude, instead of only including int, so I think the one in frame_methods can be safely removed to avoid duplication of benchmarks:

class SelectDtypes:
try:
params = [
tm.ALL_INT_NUMPY_DTYPES
+ tm.ALL_INT_EA_DTYPES
+ tm.FLOAT_NUMPY_DTYPES
+ tm.COMPLEX_DTYPES
+ tm.DATETIME64_DTYPES
+ tm.TIMEDELTA64_DTYPES
+ tm.BOOL_DTYPES
]
except AttributeError:
params = [
tm.ALL_INT_DTYPES
+ tm.ALL_EA_INT_DTYPES
+ tm.FLOAT_DTYPES
+ tm.COMPLEX_DTYPES
+ tm.DATETIME64_DTYPES
+ tm.TIMEDELTA64_DTYPES
+ tm.BOOL_DTYPES
]
param_names = ["dtype"]
def setup(self, dtype):
N, K = 5000, 50
self.index = tm.makeStringIndex(N)
self.columns = tm.makeStringIndex(K)
def create_df(data):
return DataFrame(data, index=self.index, columns=self.columns)
self.df_int = create_df(np.random.randint(low=100, size=(N, K)))
self.df_float = create_df(np.random.randn(N, K))
self.df_bool = create_df(np.random.choice([True, False], size=(N, K)))
self.df_string = create_df(
np.random.choice(list(string.ascii_letters), size=(N, K))
)
def time_select_dtype_int_include(self, dtype):
self.df_int.select_dtypes(include=dtype)
def time_select_dtype_int_exclude(self, dtype):
self.df_int.select_dtypes(exclude=dtype)
def time_select_dtype_float_include(self, dtype):
self.df_float.select_dtypes(include=dtype)
def time_select_dtype_float_exclude(self, dtype):
self.df_float.select_dtypes(exclude=dtype)
def time_select_dtype_bool_include(self, dtype):
self.df_bool.select_dtypes(include=dtype)
def time_select_dtype_bool_exclude(self, dtype):
self.df_bool.select_dtypes(exclude=dtype)
def time_select_dtype_string_include(self, dtype):
self.df_string.select_dtypes(include=dtype)
def time_select_dtype_string_exclude(self, dtype):
self.df_string.select_dtypes(exclude=dtype)

@jorisvandenbossche jorisvandenbossche added the Benchmark Performance (ASV) benchmarks label Nov 29, 2021
@jorisvandenbossche jorisvandenbossche added this to the 1.4 milestone Nov 29, 2021
@jreback jreback merged commit 3839dfe into pandas-dev:master Nov 29, 2021
@jreback
Copy link
Contributor

jreback commented Nov 29, 2021

thanks @jorisvandenbossche

@jorisvandenbossche jorisvandenbossche deleted the asv-remove-selectdtypes branch November 29, 2021 14:48
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Benchmark Performance (ASV) benchmarks
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants